/*
MATCHING 
*/
#delimit ;            
local programs "matching matching_controls propensity_score_match
 predicted_procomer_match nearest_neighbor_match
 procomer sl_info characteristics_proco transactions_proco merging_procomer
 regressions_proco ps_probs ps_reg_prep desc_samples
 ps_reg predict_scores closest_proco ps data nearest_neighbors
 neighbors treated_and_control treatment subsamples
 confidence_intervals sample_bigdom_matching sample_exp_matching desc_samples ps_controls
 ps_reg_prep ps_reg myttests run_reg_match figures to_match";
#delimit cr
foreach progs in `programs' {
capture program drop `progs'
}		
	
********************** 
***  INPUT FILES   ***  
**********************
global occup ""raw_data/CCSS_codetexteduc.dta""
global MNCs ""processed_data/MNC_sample.dta""
global SL_info ""raw_data/SL_info.dta""
*dummies to show in tables and graphs 	
global dummies "D_m4 D_m3 D_m2 D_0 D_1 D_2 D_3 D_4"

********************************************************************************
*Table 3, Supplementary Tables D1 and D3
*Matching estimators
********************************************************************************
{
prog matching 
display "Matching estimators"
global name_1 "Total_Sales"
global outcome_1 "y"
global name_2 "Number_of_Workers"
global outcome_2 "l"
global name_3 "TFP_CD_OLS"
global range_${name_3} "-0.05(0.1)0.15"
global outcome_3 "y k l m"
global name_4 "Corp_Sales_to_Others"
global range_${name_4} "-1.5(0.5)1"
global outcome_4 "trans_others"
global name_5 "Number_of_Corp_Buyers"
global outcome_5 "other_clients"
global name_6 "Ave_Corp_Sales_Others"
global outcome_6 "ave_trans"
global first_outcome=1
global last_outcome=6 
*list of outcomes 
global outcomes ""
forvalue r=$first_outcome / $last_outcome {
global outcomes "$outcomes ${outcome_`r'}"
}
estimates clear
*cluster and FE
global fixed_effects "year#sector4#province IDs"
global SE "cluster province#event"
*number of nearest neighbors
global neigh=3
*controls for matching 
global vars "y l w sh_educ experience_suppliers experience_MNC entry exporter importer supp_bigdom supp_exp n_suppliers n_clients cum_length_buyers"
		
*matching controls: variables that we will use for the matching 
	quiet matching_controls 

*Table 3 and Table D2 (parts b1): propensity scores matching
	propensity_score_match

*Table D2 (part c1): predicter procomer score matching
	predicted_procomer_match
	
*Table 3 (part b) and Supplementary Table D1 (part b): nearest neighbor matching	
	nearest_neighbor_match	

*Table D3: Alternative Empirical Strategies: ``Productive Linkages" 
	quiet procomer

*MAIN TABLE 3 (part a): sales, CD OLS productivity, Corp Sales to Others   
estfe proco_1 proco_3 proco_4 ps_zdiagn_1 ps_zdiagn_3 ps_zdiagn_4 ps_1 ps_3 ps_4 , ///
 labels(IDs "Firm FE" year "Year FE" year#sector4 "Year-4DSect FE" year#sector4#province "Year-4DSect-Prov FE")
return list	
esttab proco_1 proco_3 proco_4 ps_zdiagn_1 ps_zdiagn_3 ps_zdiagn_4 ps_1 ps_3 ps_4  /// 
using ${path0}/results/0-main_text/Table_3a.tex  , ///
 indicate(`r(indicate_fe)') star(* 0.10 ** 0.05 *** 0.01)  keep(${dummies1}) cells("b(fmt(3) label($\beta$) star )" "se(par fmt(3) label((SE)) )" ) ///
 stats(r2_a mean_dep sd_dep n_obs n_fe n_treated n_control , fmt(a2) labels("Adjusted R$^2$" "Mean Dep. Var. (level)" "SD Dep. Var. (level)" "\# Observations" "\# Fixed Effects" "\# Treated" "\# Control" )) ///
 mlabel("Total Sales" "TFP CD OLS" "Corp. Sales to Others" "Total Sales" "TFP CD OLS" "Corp. Sales to Others" "Total Sales" "TFP CD OLS" "Corp. Sales to Others") alignment(c) ///
 replace noconstant label nodepvar collabels(none)  compress type
 
*APPENDIX TABLE D1 (part a): total employment, number other buyers, average sales to others
	estfe proco_2 proco_5 proco_6 ps_zdiagn_2 ps_zdiagn_5 ps_zdiagn_6 ps_2 ps_5 ps_6, ///
 labels(IDs "Firm FE" year "Year FE" year#sector4 "Year-4DSect FE" year#sector4#province "Year-4DSect-Prov FE")
return list	
esttab proco_2 proco_5 proco_6 ps_zdiagn_2 ps_zdiagn_5 ps_zdiagn_6 ps_2 ps_5 ps_6 /// 
using ${path0}/results/4-appendix_d/Supplementary_Table_D1a.tex  , ///
 indicate(`r(indicate_fe)') star(* 0.10 ** 0.05 *** 0.01)  keep(${dummies1}) cells("b(fmt(3) label($\beta$) star )" "se(par fmt(3) label((SE)) )" ) ///
 stats(r2_a mean_dep sd_dep n_obs n_fe n_treated n_control , fmt(a2) labels("Adjusted R$^2$" "Mean Dep. Var. (level)" "SD Dep. Var. (level)" "\# Observations" "\# Fixed Effects" "\# Treated" "\# Control" )) ///
 mlabel("Number Workers" "Number Other Corp. Buyers" "Ave. Corp. Sales to Others" "Number Workers" "Number Other Corp. Buyers" "Ave. Corp. Sales to Others" "Number Workers" "Number Other Corp. Buyers" "Ave. Corp. Sales to Others") alignment(c) ///
 replace noconstant label nodepvar collabels(none)  compress type
end

********************************************************************************
*Tables D1 and D3,  Tables A8 and A9, Figures A3, A4, A5
*(part b of D1 is in the main text code with the matching estimators
*Alternative Empirical Strategies: ``Productive Linkages" 
********************************************************************************
prog procomer

*SL INFO
	quiet sl_info

*firm characteristics
	quiet characteristics_proco

*transactions
	quiet transactions_proco
	
*merging previous datasets
	quiet merging_procomer
	
*regression on procomer sample
	quiet regressions_proco

*table losers: procomer
quiet esttab proco_*  using ${path0}/results/4-appendix_d/Supplementary_Table_D3.tex, ///
  star(* 0.10 ** 0.05 *** 0.01) varwidth(25) ///
keep( $dummies0 ) ///
cells("b(fmt(3) label(\beta) star )" "se(par fmt(3) label((SE)) )" ) ///
stats(r2_a n_obs  n_treated n_control, fmt(a2) labels("Adjusted R$^2$" "\# Observations" "\# Treated" "\# Control" ))   ///
mlabel( $labels $labels ) alignment(c) replace noconstant label nodepvar collabels(none) type 	
end

***************
*program: sl_info
***************
prog sl_info
display "program: sl_info"
quiet{
use $SL_info, clear

preserve
gen minus_winner=-winner
sort ID minus_winner, stable
by ID: gen dup=cond(_N==1,0,_n)
drop if dup>1
replace MNC0="" if winner!=1
keep ID event MNC0 winner monto
save temp/sl.dta, replace
restore

collapse event, by(buyer)
drop event
tostring buyer, replace
save temp/buyers.dta, replace
}
end

*************************
*program: firm characteristics
*************************
prog characteristics_proco
display "program: firm characteristics"
quiet{
*using balance sheet data
use $revec_group, clear
keep ID sophistication corp empresarial estado_* sector_institucional ///
ipubgrupo inolucrogrupo nombre_r provincia firm_AE firm_ciiu4 ingresosir_* ///
trabaj_* salarios_* va_* exports_* imports_* entry exit sector_g ZF* ///
 total_activo_neto_* activos_fijos_* costo_de_ventas_* ventas_* trab_calif_*
merge 1:1 ID using temp/sl.dta

*  CONVERTING TO REAL VALUES  million colones
local money_col "ingresosir va costo_de_ventas total_activo_neto activos_fijos ventas"
forvalue year =2005/2017{
foreach vari of local money_col{
replace `vari'_`year'= (100*(`vari'_`year')/${ipc`year'})/1000000
format `vari'_`year' %16.0g
}
}
* salaries in th colones
local money_col "salarios"
forvalue year =2005/2017{
foreach vari of local money_col{
replace `vari'_`year'= (100*abs(`vari'_`year')/${ipc`year'})/1000
format `vari'_`year' %16.0g
}
}
* IMPORTS AND EXPORTS ARE IN DOLLARS 
local money_dol "exports imports"
forvalue year =2005/2017{
foreach vari of local money_dol{
replace `vari'_`year'= (100*abs(`vari'_`year')/${ipc`year'})/1000000
format `vari'_`year' %16.0g
}
}
* RESHAPING
local resh_var "ingresosir_ costo_de_ventas_ trabaj_ estado_ salarios_ va_ exports_ imports_ ZF_broad_ total_activo_neto_ activos_fijos_ ventas_ trab_calif_"
reshape long `resh_var', i(ID) j(year)

compress
drop if ingresos==. & trabaj==.

* we keep data for the same period as in the transactions dataset for consistency
drop if year<2008
drop if year>=2018
rename ID seller

*saving data
save temp/sellers0.dta, replace

drop if _m==1
drop _m
compress

destring firm_ciiu4, gen(sector4)
gen sector3=floor(sector4/10)
gen sector2=floor(sector3/10)

*saving data
save temp/sellers.dta, replace
}
end

*******************
*program: transactions
*******************
prog transactions_proco
display "program: transactions"
quiet{
use $transactions, clear
drop if pot==1
drop pot
merge m:1 seller year using temp/sellers
bys seller: egen max_m=max(_m)
keep if max_m==3

* TRANSACTIONS TO REAL VALUE
local trans_vari "monto_tot"
foreach v of local trans_vari{
forvalue i=2008/2017{
replace `v'=(100*(`v')/${ipc`i'})/1000000 if year==`i'
}
}
local trans_vari "trans_"
foreach v of local trans_vari{
forvalue i=2008/2017{
replace `v'=(100*(`v')/${ipc`i'})/1000000 if year==`i'
}
}
drop _m max_m
merge m:1 buyer using temp/buyers

gen mnc_buyer=(_m==3)
gen trigg=(MNC0==buyer & MNC0!="")

bys seller year: egen max_t=max(trigg)
replace max_t=. if year!=event 

gen minus_trans=-trans
sort seller year minus_trans , stable 
by seller year: gen pos=_n
replace trans=monto if pos==1 & event==year &  max_t==0
replace trigg=1 if pos==1 & event==year  & max_t==0
drop max_t pos

sort seller trigg year , stable 
by seller trigg: gen spell=_n if trigg!=0
by seller trigg: gen spelltot=_N if trigg!=0
gen eventyear=spell-1

drop if _m==2
drop _m

*saving data
save temp/trans.dta , replace

* TRANS TO NON-TRIGGERING MNC
 
gen double trans2=trans*trigg
bys seller year: egen double tot_trans_trig=total(trans2)
drop trans2
bys seller year: egen double all_trans=total(trans)
gen n_clients=1
gen n_trig=trigg
compress

collapse (sum) n_clients n_trig (mean) event all_trans tot_trans_trig ingres, by(year seller)

rename seller ID
save temp/fore_trans, replace
}
end

******************************
*program: merging previous datasets
******************************
prog merging_procomer
display "merging_procomer"
use $SL_info, clear
rename ID seller
joinby seller using temp/sellers

rename seller ID
merge m:1 ID year using temp/fore_trans
drop if _m==2
drop _m
gsort case -winner_d ID year
order case winner_d ID year, first
save temp/merged.dta, replace
end

***********************
*program: regressions proco
***********************
prog regressions_proco
display "regression results"
quiet{
use temp/merged.dta, clear

***
*** DUMMIES OF EVENT
***
global outcome_3 "y k l"
capture drop D_*
capture drop T_*
capture drop time
gen time=year-event
sort ID year, stable
gen profits_=va_-sala

gen y=log(ingresos)
gen l=log(trabaj)
gen k=log(total_activo_neto_)
gen m=log(costo_de)
gen va=log(va_)
egen province=group(provincia)

* number of clients
replace tot_trans_trig=0 if tot_trans_trig==. & year>=2008
gen sales_not_MNC=ingresosir-tot_trans_trig if year>=2008
replace sales_not_MNC=0 if sales_not_MNC<0
gen l_sales_not_MNC= log(sales_not_MNC)
replace n_clients=0 if n_clients==. & year>=2008
replace n_trig=0 if n_trig==. & year>=2008
* trans to others
gen trans_others= log(all_trans - tot_trans_trig)
gen other_clients=log(n_clients-n_trig+1) if trans_others !=. 
gen ave_trans = trans_others-other_clients

*saving temp data
save temp/temp_analysis.dta, replace 

* event study dummies 
scalar first_year=-5
scalar last_year=5
forvalue i=`=first_year'(1)`=last_year'{
if `i'<0{
scalar j=abs(`i')
gen D_m`=j' =( time==`i')
label var D_m`=j' "Losers, t $ =`i'$"
gen T_m`=j' =winner_d*( time==`i')
label var T_m`=j' "Diff Winners, t $ =`i'$"
if `i'==`=first_year'{
replace D_m`=j' =( time<=`i' & time!=.)
label var D_m`=j' "Losers, t $\leq `i'$"
replace T_m`=j' =winner_d*( time<=`i' & time!=.)
label var T_m`=j' "Diff Winners, t $\leq `i'$"
}
}
if `i'>=0{
gen D_`i' = (time==`i')
label var D_`i' "Losers, t $ =`i'$"
gen T_`i' = winner_d*(time==`i')
label var T_`i' "Diff Winners, t $ =`i'$"
if `i'==`=last_year'{
replace D_`i' = (time>=`i'  & time!=.)
label var D_`i' "Losers, t $\geq `i'$"
replace T_`i' = winner_d*(time>=`i'  & time!=.)
label var T_`i' "Diff Winners, t $\geq `i'$"
}
}
}
drop D_m1 T_m1
order D_* T_* , last
ds D_m4-D_4 
global dummies0 `r(varlist)'
ds T_m4-T_4 
global dummies1 `r(varlist)'
egen IDs=group(ID)
egen group_ID=group(ID case)
global last_year=`=last_year'
global first_year=`=first_year'
gen matching=(winner_d)
}

quiet{
global labels ""
forvalue i= $first_outcome / $last_outcome {
capture drop res
global REG "reghdfe ${outcome_`i'} D_* T_*  , absorb(year case IDs) vce(robust)"
global outcome "${name_`i'}"
global labels "$labels ${name_`i'}"
if "$outcome" == "Employment" | "$outcome" == "Number_of_Other_Clients" {
global quant "1"
}
run_reg_match

estimates store proco_`i'
capture drop res
local outcome "$outcome"
}  	
}
end

********************************************************************************
*propensity scores matching
********************************************************************************
prog propensity_score_match
display "Figure 3 and Table D2: propensity scores matching"
set seed 123 
global ps_type "Propensity Score"
global procos ""

display "propensity scores matching"
*probabilities of being in t= event
	ps_probs
	
*regression preparations
	quiet ps_reg_prep
	
*descriptives matched and unmatched sample 	
	desc_samples	
	
*regression 
	ps_reg
 
end

********************************************************************************
*
*propensity scores matching: details
*
********************************************************************************
{
********************************************************************************
*propensity score: probabilities of being in t= event
********************************************************************************
prog ps_probs
display "propensity score: probabilities of event"
quiet{
	
*loading data	
use temp/analysis0.dta , clear


*drop old/past suppliers
drop if past==1

*years FE
forvalue u=2010/2015{
	gen years_`u'=(year==`u')
}
replace years_2010=(year<=2010)
replace years_2015=(year>=2015)
compress

gen dummy_event = (event==year)
*probit for propensity score
quiet probit dummy_event $vars years* i.sector4 i.province
estimates store probit	
}
*table 
esttab probit using ${path0}/results/4-appendix_d/Supplementary_Table_D2_b1.tex, ///
  star(* 0.10 ** 0.05 *** 0.01) varwidth(25) ///
keep( $vars ) ///
cells("b(fmt(3) label(\beta) star )" "se(par fmt(3) label((SE)) )" )   ///
 alignment(c) replace noconstant label nodepvar collabels(none) type
quiet{
*predicted conditional probability
predict pred_prob

*range of years in which we have events  
summ event 
global min_event=`r(min)'
global max_event=`r(max)'

*treated firms in main regression 
reghdfe y  D_* if past!=1 , absorb(sector4#year#province IDs ) resid(res)
bys ID: egen mean_res=mean(res)
drop if mean_res==. & treated==1
drop *res*

*saving temp file
save temp/temp_data_ps.dta, replace
}
*loop foreach event year
forvalue i= $min_event / $max_event {
quiet{
global year_=`i'
*previous data
use temp/temp_data_ps.dta , clear

*each regression is for obs in a given year only
keep if year==$year_

**
*loop for each treated firm: find controls
**

*created a number for each treated firm over which we iterate
sort ID, stable 
egen treated_number=group(ID) if event==$year_
}
*closest in propensity score 
ps_controls

}
quiet{
*use previous data
use temp/temp_data_ps.dta , clear
drop event

*merging with treated firms and their neighbors
joinby ID using temp/temp_data2_ps.dta
save temp/temp_data_ps.dta , replace 
}
end
}
********************************************************************************
*predicted procomer score matching
********************************************************************************
prog predicted_procomer_match
set seed 123 
display "Figure 3 and Table D1: predicted procomer score matching"
global ps_type "Predicted Scores"
global procos "_zdiagn"


*predicting scores in the full sample
	quiet predict_scores

*closest in predicted procomer score
	closest_proco
	
*regression preparations
	quiet ps_reg_prep
	
*descriptives matched and unmatched sample 	
	desc_samples
	
*regression 
	quiet ps_reg

end

********************************************************************************
*
*matching based on predicted scores: details
*
********************************************************************************
{
********************************************************************************
*predicting scores in the full sample
********************************************************************************
prog predict_scores

global scores ""raw_data\proco_scores.dta""

*scores, no time 
use $scores, clear
gsort ID -proco 
by ID: gen pos=_n 
drop year
reshape wide proco_score, i(ID) j( pos)
tempfile sco 
save `sco', replace 

*merge revec
use $revec_group, clear
keep ID ingresosir_* trabaj_* total_activo_neto_* costo_de_ventas_* /// 
sector_g provincia
reshape long ingresosir_ trabaj_ total_activo_neto_ costo_de_ventas_, i(ID) j(year)
drop if ingresosir_==.
*merging other controls 
merge 1:1 ID year using temp/temp_control_vars.dta
drop if _m==2
drop _m 
*merging scores 
merge 1:1 ID year using $scores
drop if _m==2
drop _m
*keep if proco_score!=. 
 
*regressions
gen y=log(ingresosir_/500/1000)
gen l=log(trabaj_)
gen k=log(total_activo/500/1000)
gen m=log(costo_de_ventas_/500/1000)
label var y "Log sales"
label var l "Log employment"
label var w "Average wages"
label var sh_educ "Share of college workers"
label var experience_MNC "Employees w/ exp. at MNCs"
label var experience_suppliers "Employees w/ exp. at supp. of MNCs"
label var entry "Year of firm entry"
label var exporter "Share of exporters"
label var importer "Share of importers"
label var n_clients "Number of clients"
label var n_suppliers "Number of suppliers"
label var supp_bigdom "Suppliers to big domestic firms"
label var supp_exp "Suppliers to exporters" 
label var cum_length_buyers "Average duration (years) with buyers"

egen sect=group(sector_g)
replace sect=0 if sect==.

capture drop fe*
reghdfe proco_score $vars , absorb(fe=i.sect) 
estimates store reg_proco_score 
*table 
esttab reg_proco_score using ${path0}/results/4-appendix_d/Supplementary_Table_D2_c1.tex, ///
  star(* 0.10 ** 0.05 *** 0.01) varwidth(25) ///
keep( $vars ) ///
cells("b(fmt(3) label(\beta) star )" "se(par fmt(3) label((SE)) )" )   ///
 alignment(c) replace noconstant label nodepvar collabels(none) type

*saving coeffs
preserve
collapse fe, by(sect)
replace fe=0 if fe==.
tempfile fes 
save `fes', replace 
restore 
 
use temp/analysis0.dta, clear
egen sect=group(sector_g)
replace sect=0 if sect==.
merge m:1 sect using `fes'
keep if _m==3
drop _m
global cons=_b[_cons] + fe
gen proco_score=$cons 
foreach v in $vars {
	local coeff=_b[`v']
	replace proco_score= proco_score+`coeff'*`v'
}

*treated firms in main regression 
reghdfe y  D_* if past!=1 , absorb(sector4#year#province IDs ) resid(res)
bys ID: egen mean_res=mean(res)
drop if mean_res==. & treated==1
drop *res*

*treated firms
drop if past==1

keep $outcomes year l ID event time treat* sector_g sector* province $vars proco_score
compress

*saving 
save temp/temp_scores.dta, replace 

end

********************************************************************************
*closest in predicted procomer score
********************************************************************************
prog closest_proco
quiet{
*using previous data 
use temp/temp_scores.dta, clear
gen dummy_event = (event==year)
summ event 
*loop foreach event year 
global min_event=`r(min)'
global max_event=`r(max)'

*saving temp file
save temp/temp_data_ps.dta, replace
}

forvalue i= $min_event / $max_event {
quiet{
global year_=`i'
*previous data
use temp/temp_data_ps.dta , clear
keep if year==$year_
egen treated_number=group(ID) if event==$year_
} 
*closest in score 
ps_controls

}

quiet{
*use previous data
use temp/temp_data_ps.dta , clear
drop event

*merging with treated firms and their neighbors
joinby ID using temp/temp_data2_ps.dta
save temp/temp_data_ps.dta , replace 
}

end

}

****************************
*nearest neighbor matching
****************************
prog nearest_neighbor_match 
set seed 123  
*number of subsamples to compute confidence intervals 
global n_subsamples=500

*foreach outcome	
forval r= $first_outcome / $last_outcome {
	global n_outcome=`r'
	global var ${outcome_`r'}
	global var_name ${name_`r'}
	if "$var_name"=="TFP_CD_OLS"{
	global var "CD"
	}
	
	*getting treatment effect
	global estimate_=1
	global subsample "main"	

*using main data
	quietly data
	
*nearest neighbors for each treated firm
display "main sample"
	nearest_neighbors

*appending treated and control firms	
	quietly treated_and_control
	
*average treatment effects
	quietly treatment

*subsamples for standard errors
    quietly subsamples 

*confidence intervals
	confidence_intervals
	
display "Table ${var_name}. # treated ${n_treated}. # control ${n_control}. Obs ${total_obs}"
display "Table ${var_name}. # treated ${n_treated}. # control ${n_control}. Obs ${total_obs}"
display "Table ${var_name}. # treated ${n_treated}. # control ${n_control}. Obs ${total_obs}"

list event coeff *5p 
}

*table 3: columns 10, 11, 12
clear
foreach p in 1 3 4 {
capture append using ${path}/temp/results_nn_${name_`p'}.dta
}
keep event_time coeff_hat lower_bound_5p upper_bound_5p ///
 var_name n_treated n_control total_obs
list
dataout , ///
 save(${path0}/results/0-main_text/Table_3b.tex) tex replace 
 
*table D1: columns 10, 11, 12
clear
foreach p in 2 5 6 {
capture append using ${path}/temp/results_nn_${name_`p'}.dta
}
keep event_time coeff_hat lower_bound_5p upper_bound_5p ///
 var_name n_treated n_control total_obs
list
dataout , ///
 save(${path0}/results/4-appendix_d/Supplementary_Table_D1_b.tex) tex replace  
end
}

********************************************************************************
*
*nearest neighbor matching: details 
*
********************************************************************************
{
********************************************************************************
*nearest neighbor: using main data
********************************************************************************
prog data
*display "data"
quiet{	
*loading data	
use $analysis_data , clear

*standardizing names
capture rename l_not_MNC_event sales_others
capture rename l_clients_non_event other_clients
capture rename l_not_MNC_event_2 trans_others
capture rename l_ave2 ave_trans
capture replace other_clients=. if trans_others==.

*creating prod residuals from Cobb-Douglas 
capture reghdfe y k l m  if past!=1 , ///
 absorb(year#sector4#province) resid(CD)

*keep relevant variables 
keep if $var !=. 
 
*drop old/past suppliers and generating number to iterate over treated
drop if past==1
egen treat_number=group(ID) if event!=.
summ treat_number
global tot_ids= `r(max)'

*keep relevant variables
keep $outcomes year ID event time treat* sector_g y l sector* CD
compress

*saving temp file
save temp/temp_data_nn.dta, replace
}
end

********************************************************************************
*nearest neighbor: neighbors for each treated firm
********************************************************************************
prog nearest_neighbors
display "${subsample}, " _continue 	
forvalue i=1/ $tot_ids {
quiet{
global firm_to_match= `i'

	*cases to match
	to_match
	
	*neighbors	
	neighbors
}
}
end

********************************************************************************
*nearest neighbor: cases to match
********************************************************************************
prog to_match
display "cases to match"

*variables to match
local temp_ $var
global match : list uniq temp_
display "$match"

*use previous data
use temp/temp_data_nn.dta, clear

*firm to match
gen matching= (treat_number==$firm_to_match )

*no other treated in this iteration
drop if event!=. & matching==0

*keeping sector of the firm
summ sector4 if matching==1
keep if sector4==`r(mean)'

*check only one treated firm 
distinct ID if event!=.
capass `r(ndistinct)'==1 , throw("taking more than one matched at a time")

**
*years -3 and -2 or min
**
egen min_time=min(time)

*loop for variables
foreach v of global match{

if min_time<=-2{
*loop for years
forvalue t=3(-1)1{
sum year if time==-`t'
capture local year_`t'=`r(mean)'

*values 
capture drop temp
capture gen temp= `v' if year==`year_`t''
capture bys ID: egen `v'_control_`t'= max(temp)
}
capture gen `v'_control_gr3= `v'_control_3 / `v'_control_2 - 1
capture gen `v'_control_growth2= `v'_control_2 / `v'_control_1 - 1
}
if min_time==-1{
*loop for years
forvalue t=1/1{
sum year if time==-`t'
capture local year_`t'=`r(mean)'

*values 
capture drop temp
capture gen temp= `v' if year==`year_`t''
capture bys ID: egen `v'_control_`t'= max(temp)
}
}
if min_time==0{
*loop for years
forvalue t=0/0{
sum year if time==-`t'
capture local year_`t'=`r(mean)'

*values 
capture drop temp
capture gen temp= `v' if year==`year_`t''
capture bys ID: egen `v'_control_`t'= max(temp)
}
}
}
gen _control_z=0
*one obs per year
collapse *_control_* , by(ID matching)
*saving temp file
save temp/temp_data1_nn.dta, replace
end

********************************************************************************
*nearest neighbor: neighbors	
********************************************************************************
prog neighbors
display "nearest neighbor"

*using previous data
use temp/temp_data1_nn.dta, clear
drop *_control_z
*standardization
gen score=0
capture ds *_control_*
if "`r(varlist)'"!=""{
capture ds *_control_*
foreach v in `r(varlist)'{
summ `v'
gen z_`v'=(`v'-`r(mean)')/`r(sd)' if `v'!=.

*deviation vs treated
capture drop minus_matching
gen minus_matching=-matching
sort minus_matching ID, stable
capass matching[1]==1 , throw("matched firm not in first position")
capass matching[2]!=1 , throw("unmatched firm equal to the matched one")

*score
replace score= (z_`v'- z_`v'[1])^2 + score
replace score=. if z_`v' ==.
}
drop if score==.

if _N>0{
*minimum score
capture drop minus_matching
gen minus_matching=-matching
sort minus_matching , stable 

capass score[1]==0 , throw("scores not sorted")
sort score ID, stable 
sort minus_matching score ID , stable 
*selecting nearest neighbors (+1 because it includes treated)
keep if _n<= $neigh + 1
gen pos=_n-1

keep ID matching pos
gen group=$firm_to_match
}

if $firm_to_match >1{
*appending previous data
append using temp/temp_data2_nn.dta
}

*saving temp file
save temp/temp_data2_nn.dta, replace
}
end

********************************************************************************
**nearest neighbor: treated and control 
********************************************************************************
prog treated_and_control
display "treated and control "
quiet{
*using original data
use temp/temp_data_nn.dta, clear
capture drop _m

*dealing with firms chosen more than once
joinby ID using temp/temp_data2_nn.dta, unmatched(master)
keep if group!=.

*assigning event time to controls
bys group: egen event_year=mean(event)
gen event_time=year-event_year

*max and min time 
capture drop max_t min_t
bys group: egen max_t=max(time)
bys group: egen min_t=min(time)
keep if event_time>=min_t & event_time<=max_t

*outcome for treated
egen ids=group(ID group)
capture drop temp
gen temp=$var if matching==1
bys group event_time: egen treat_outcome = mean(temp) 
drop temp 

*dropping groups without treated
drop if pos==0 & matching==0
drop if matching==1 & pos>0
bys group: egen m_pos=max(pos)
bys group: egen m_pos2=min(pos)
keep if m_pos==$neigh
keep if m_pos2==0

if "$subsample" == "main" {
distinct ids if matching ==1 
global n_treated= `r(ndistinct)'
distinct ids if matching !=1 
global n_control= `r(ndistinct)'

*number of treated and control in each sampling to construct conf intervals 
global N_1 = $n_treated
global N_0 = $n_control

*for sampling and constructiong standard errors (see instructions in paper)

*tuning parameter
global R=3

*Treated selected without replacement
global B1= round($R  * ( $N_1 )^0.5)

*Controls selected without replacement
global B0= round($R * $N_0 / ( $N_1 )^0.5)


}

*saving data
save temp/temp_data4_nn.dta, replace
}
end

********************************************************************************
**nearest neighbor: average treatment effects
********************************************************************************
prog treatment 
display "average treatment effects"
quiet{
*using previous data 
use temp/temp_data4_nn.dta, clear
if "$subsample" == "main" {
global total_obs = _N
}
drop if matching==1

*differences by event time
gen diff=treat_outcome-$var
collapse diff , by(event_time group)
collapse diff , by(event_time)
gen outcome="$var"
drop if event_time==.

*normalizing time=-1 to zero
gen temp= diff if event_time==-1
egen t_minus_1=mean(temp)
replace diff=diff-t_minus_1
drop temp

if $estimate_ == 1 {
*saving temp file
gen estimated_effect=1
save temp/temp_data_nn_${var}.dta, replace
}

if $estimate_ == 0 {
append using temp/temp_data_nn_${var}.dta
*saving temp file
save temp/temp_data_nn_${var}.dta, replace
}
}
end

********************************************************************************
**nearest neighbor: subsamples for standard errors
********************************************************************************
prog subsamples
*display "subsamples for standard errors"
quiet{
use temp/temp_data_nn.dta, clear

collapse treated, by(ID)
keep ID treated

*saving temp file
compress
save temp/temp_data3_nn.dta, replace

tempfile temp_subsamples
}
forvalues i = 1/ $n_subsamples {
	*di "subsample `i'"
	quiet{
	*using original data
	use temp/temp_data3_nn.dta, clear
	gen subsample_id = `i'
	
	gen rand = runiform()
	sort treated rand, stable 
	by treated (rand): gen in_sample = _n <= $B1 if treated==1
	by treated (rand): replace in_sample = _n <= $B0 if treated==0
	keep if in_sample==1
	
	count if in_sample & treated
	capass `r(N)' == $B1 , throw("subsample of treated \neq B1")
	count if in_sample & !treated
	capass `r(N)' == $B0 , throw("subsample of controls \neq B0")
	compress 
		
	if `i'>1{
	append using `temp_subsamples'
	}
	
	save `temp_subsamples', replace
	}
}
*saving temp file
quietly compress 
quietly save temp/sub_samples_nn.dta, replace
end

********************************************************************************
*nearest neighbor: confidence intervals
********************************************************************************
prog confidence_intervals
*display "confidence intervals"
quiet{
*getting standard errors
global estimate_=0

*keeping data 
use temp/temp_data_nn.dta, clear
save temp/temp_data_nn_full.dta, replace
}
display "Nearest Neighbors: Outcome ${var_name}. Total subsamples ${n_subsamples}. Subsample "
forvalue s=1/$n_subsamples {
quiet{

global subsample= `s'

*subsample `s'
use temp/sub_samples_nn.dta, clear
keep if subsample_id == $subsample

*merging back with characteristics data
merge 1:m ID using temp/temp_data_nn_full.dta
keep if _m==3
drop _m 

capture drop treat_number 
egen treat_number=group(ID) if event!=.
summ treat_number
global tot_ids= `r(max)'

*new input data 
save temp/temp_data_nn.dta, replace
}

*nearest neighbors for each treated firm
	nearest_neighbors
	
*appending treated and control firms
	quietly treated_and_control	
	
*average treatment effects for each subsample
	quietly treatment
}

quiet{
*using the output of the program treatment
use  temp/temp_data_nn_${var}.dta, clear

*empirical CDF 
capture drop temp
gen temp= diff if estimated_effect==1
bys event_time: egen coeff_hat=mean(temp) 
drop if estimated_effect==1
drop estimated_effect temp t_minus_1
gen element=   (( $B1 )^0.5/( $N_1 )^0.5) * (diff-coeff_hat)+coeff_hat

*cdf
sort event_time element, stable 
by event_time: gen pos=_n 
by event_time: gen temp_tot=_N
replace pos=pos/temp_tot

*lower bound (1%)
gen temp=abs(pos-0.01)
bys event_time: egen temp_lowerb=min(temp)
gen temp_lowerb2= element if temp_lowerb==temp
bys event_time: egen lower_bound_1p=mean(temp_lowerb2)
capture drop temp*

*upper bound (1%)
gen temp=abs(pos-0.99)
bys event_time: egen temp_upperb=min(temp)
gen temp_upperb2= element if temp_upperb==temp
bys event_time: egen upper_bound_1p=mean(temp_upperb2)
capture drop temp*

*lower bound (5%)
gen temp=abs(pos-0.025)
bys event_time: egen temp_lowerb=min(temp)
gen temp_lowerb2= element if temp_lowerb==temp
bys event_time: egen lower_bound_5p=mean(temp_lowerb2)
capture drop temp*

*upper bound (5%)
gen temp=abs(pos-0.975)
bys event_time: egen temp_upperb=min(temp)
gen temp_upperb2= element if temp_upperb==temp
bys event_time: egen upper_bound_5p=mean(temp_upperb2)
capture drop temp*

*lower bound (10%)
gen temp=abs(pos-0.05)
bys event_time: egen temp_lowerb=min(temp)
gen temp_lowerb2= element if temp_lowerb==temp
bys event_time: egen lower_bound_10p=mean(temp_lowerb2)
capture drop temp*

*upper bound (10%)
gen temp=abs(pos-0.95)
bys event_time: egen temp_upperb=min(temp)
gen temp_upperb2= element if temp_upperb==temp
bys event_time: egen upper_bound_10p=mean(temp_upperb2)
capture drop temp*

*coeff and bounds
collapse coeff_hat lower_bound* upper_bound* , by(event_time)
*figures 
global name "$var_name"
local outcome="$var_name"
local outcome: subinstr local outcome "_" " " 
local outcome: subinstr local outcome "_" " " 
local outcome: subinstr local outcome "_" " " 
local outcome: subinstr local outcome "_" " " 
global outcome="`outcome'"
if "$var_name" == "CD_OLS" {
figures
global fig_num "fig3e"
}
if "$var_name" == "Trans_to_Other_Firms" {
figures
global fig_num "fig3f"
}
capture keep if mod(event_time, 1) == 0



gen var_name="${var_name}"
gen n_treated=${n_treated}
gen n_control=${n_control}
gen total_obs=${total_obs}
keep if event_time>=-4 & event_time<=4

*saving 
save temp/results_nn_${var_name}.dta, replace 
} 	

end
}

**********************************
*matching controls: variables that we will use for the matching 
**********************************
prog matching_controls
set seed 123 

********
* from revec data  
********
*importer, exporter, firm age, wagebill per worker 
use $revec_group, clear
keep ID ingresosir_* imports_* exports_* trabaj_* salarios_* entry 
*reshaping 
reshape long ingresosir_ imports_ exports_ trabaj_ salarios_ , i(ID entry) j(year)
drop if ingresosir_==.
gen exporter=(exports>0 & exports!=.)
gen importer=(imports>0 & imports!=.)
gen w=log(salarios)-log(trabaj)
*keeping relevant variables 
keep ID year w importer exporter entry
*saving 
compress
save temp/revec_controls.dta, replace 
********
* from EE data 
********
*occupations - education
use $occup, clear
replace code_str=substr(code_str,1,50)
drop educ_str_esp educ_str_eng  code_str
compress
save temp/occupations.dta, replace

*list of mncs 
use $MNCs, clear
keep ID 
gen MNC_firm=1
destring ID, replace
format ID %16.0g
tempfile mncs 
save `mncs', replace 

*share of college workers
*first, we need to bring the employer-employee data (same code as in other managers exercise)
capture confirm file temp/ee_full.dta
if _rc!=0 {
do ${path0}/all_codes/Z-MEED.do		
}
*using output data drom Z-MEED
use temp/ee_full, clear
merge m:1 ID using `mncs'
drop _m

*panel 
sort year num_identi sala, stable 
by year  num_identi: gen pos=_n
by year  num_identi: gen num=_N
keep if num==pos
drop num pos 
xtset num_identi year, y

*working for MNC in the past 
gen experience_MNC=0
scalar lags=2017-2007+1
forvalue i=1/`=lags' {
	replace experience_MNC=1 if l`i'.MNC_firm==1
}

*working for a supplier of MNCs in the past
preserve
use $analysis_data, clear
replace event=2008 if past==1
bys ID: gen dup=cond(_N==1,0,_n)
drop if dup>1
keep if event!=.
keep ID event 
destring ID , replace 
tempfile suppliers
save `suppliers', replace 
restore
*merging suppliers 
merge m:1 ID using `suppliers'
drop _m
*worker worked for supplier
gen temp=1 if year>event & event!=.
*first time it happens for the worker 
replace temp=temp*year
bys num_identi: egen temp2= min(temp)
*years when worker has experience
gen experience_suppliers=(year>=temp2)
drop temp*
*educ 
rename tip_ocu occup
keep ID year occup experience_MNC experience_suppliers
merge m:1 occup using temp/occupations.dta
assert _m==3
drop _m
gen edu=0
replace edu=1 if (educ_cat=="lic" | educ_cat=="bach" | educ_cat=="per")
gen workers=1
drop if year<2008
*summing across observations for each firm
collapse (sum) workers edu experience_MNC experience_suppliers, by( ID year )
tostring ID , replace format(%24.0g)
gen sh_educ=edu/workers 
gen sh_experience=experience_MNC/workers
replace experience_MNC=(experience_MNC>0)
replace experience_suppliers=(experience_suppliers>0)
keep ID year sh* experience*
save temp/EE_controls.dta, replace

********
* from transactions data 
********
*supplies to large dom firm, suppliers to exporter

*Big domestic firms
	sample_bigdom_matching
*Exporters  	
	sample_exp_matching
	
*suppliers to those firms
use temp/temp_bigdom.dta , clear 
merge 1:1 buyer seller year using temp/temp_exp.dta
drop _m 
*length 
egen pair=group(buyer seller)
drop if pair==.
gen cum_length_buyers=1
xtset pair year, y
tempfile temp_together 
save `temp_together', replace 

forvalue q=2008/2017{
	use `temp_together', clear 
	drop if year>`q'
	collapse (sum) cum_length_buyers , by(pair)
	gen year=`q'
	tempfile cum_`q'
	save `cum_`q'', replace 
}
clear 
forvalue q=2008/2017{
	append using `cum_`q''
}	
merge 1:1 pair year using `temp_together'
keep if _m==3 
drop _m
*number of suppliers  
preserve
gen n_suppliers=1
collapse (sum) n_suppliers , by(buyer year)
rename buyer ID
tempfile n_suppliers
save `n_suppliers', replace 
restore 
*number of clients 
bys seller year: gen n_clients=_N
*suppliers to big domestic firms or exporter
bys seller year: egen supp_bigdom=max(buyer_bigdom)	
bys seller year: egen supp_exp=max(buyer_exp)
bys seller year: egen cum_length_buyers_avg= mean(cum_length_buyers)	
drop cum_length_buyers
rename cum_length_buyers_avg cum_length_buyers	
keep seller year supp_bigdom supp_exp n_* cum_length* 
rename seller ID 
bys ID year: gen dup=cond(_N==1,0, _n)
drop if dup>1
merge 1:1 ID year using `n_suppliers'
drop _m dup
save temp/trans_controls.dta , replace 
	
********
* merging the set of controls 
********	
use temp/revec_controls.dta, clear
merge 1:1 ID year using temp/trans_controls.dta
drop _m
merge 1:1 ID year using temp/EE_controls.dta
drop if _m==2
drop _m 
foreach v in $vars  {
capture replace `v'=0 if `v'==.
}
compress 
save temp/temp_control_vars.dta , replace 

*analysis data 
use $analysis_data , clear
capture drop entry importer exporter n_clients n_suppliers
merge 1:1 ID year using temp/temp_control_vars.dta
replace n_suppliers=0 if n_suppliers==.
keep if _m==3 
drop _m 
gen y_l=y-l
label var y "Log sales"
label var l "Log employment"
label var w "Average wages"
label var sh_educ "Share of college workers"
label var experience_MNC "Employees w/ exp. at MNCs"
label var experience_suppliers "Employees w/ exp. at supp. of MNCs"
label var entry "Year of firm entry"
label var exporter "Share of exporters"
label var importer "Share of importers"
label var n_clients "Number of clients"
label var n_suppliers "Number of suppliers"
label var supp_bigdom "Suppliers to big domestic firms"
label var supp_exp "Suppliers to exporters" 
label var cum_length_buyers "Average duration (years) with buyers"

*standardizing names of variables
capture rename l_not_MNC_event sales_others
capture rename l_clients_non_event other_clients
capture rename l_not_MNC_event_2 trans_others
capture rename l_ave2 ave_trans
capture replace other_clients=. if trans_others==.
capture rename log_trans_not_GOV trans_others

save temp/analysis0.dta , replace 

end

********************************************************************************
*
*matching controls: details
*
********************************************************************************
{
*************************
*selecting big dom firms (similar as in tables of appendix E)
*************************
prog sample_bigdom_matching
display "selecting Big Dom firms"

quiet{
set seed 123 
use $analysis_data, clear
keep event past ID year ingresos
rename * seller_*
rename seller_ID seller
rename seller_year year
tempfile events
save `events', replace

*IMPORTING ALL FIRMS
use $analysis_data, clear
bys ID: egen med_workers=median(trabaj_)
keep if med_w>100
distinct ID
bys ID: gen dup=cond(_N==1,0,_n)
drop if dup>1
keep ID   
preserve 
rename ID seller
tempfile as_seller
save `as_seller', replace
restore
rename * b_*
rename b_ID buyer
tempfile as_buyer
save `as_buyer', replace

* TRANSACTIONS
use $transactions, clear
drop if pot==1
quiet{
* TRANSACTIONS TO REAL VALUE
local trans_vari "trans"
foreach v of local trans_vari{
forvalue i=2008/2017{
replace `v'=(100*(`v')/${ipc`i'})/1000000 if year==`i'
}
}
}
drop pot

** MERGING FIRMS
merge m:1 buyer using `as_buyer'
gen buyer_bigdom=(_m==3)
drop if _m==2
drop _m
merge m:1 seller year using `events'
keep if _m==3
drop _m
*excluding the large buyers when they are sellers
merge m:1 seller using `as_seller'
drop if _m==3
drop _m

}
*saving temp data
save temp/temp_bigdom.dta , replace
end 

*************************
*selecting exp dom firms (similar as in tables of appendix E)
*************************
prog sample_exp_matching
display "selecting exporting firms"

quiet{
set seed 123 
use $analysis_data, clear
keep event past ID year ingresos
rename * seller_*
rename seller_ID seller
rename seller_year year
tempfile events
save `events', replace

*IMPORTING ALL FIRMS
use $analysis_data, clear
gen temp_exporter=(exports>0 & exports!=.)
bys ID: egen temp_years_exporter=total(temp_exporter)
bys ID: gen temp_years=_N
*exporters all years
gen exporter_=(temp_years_exporter==temp_years)
keep if exporter_==1

bys ID: gen dup=cond(_N==1,0,_n)
drop if dup>1
keep ID   
preserve 
rename ID seller
tempfile as_seller
save `as_seller', replace
restore
rename * b_*
rename b_ID buyer
tempfile as_buyer
save `as_buyer', replace

* TRANSACTIONS
use $transactions, clear
drop if pot==1
quiet{
* TRANSACTIONS TO REAL VALUE
local trans_vari "trans"
foreach v of local trans_vari{
forvalue i=2008/2017{
replace `v'=(100*(`v')/${ipc`i'})/1000000 if year==`i'
}
}
}
drop pot

** MERGING FIRMS
merge m:1 buyer using `as_buyer'
gen buyer_exp=(_m==3)
drop if _m==2
drop _m
merge m:1 seller year using `events'
keep if _m==3
drop _m
*excluding the buyers when they are sellers
merge m:1 seller using `as_seller'
drop if _m==3
drop _m

}
*saving temp data
save temp/temp_exp.dta , replace
end 
}
********************************************************************************
*
*auxiliary codes
*
********************************************************************************
{
    
********************************************************************************
*propensity score: descriptives 
********************************************************************************
prog desc_samples
****
**** unmatched sample 
**** 
display "Descriptives unmatched sample"
quiet{
capture unab varlist : $vars
capture unab exclude : proco_score
local varlist : list varlist - exclude
global vars `varlist'

use temp/analysis0.dta, clear 

*treated firms in main regression 
reghdfe y  D_* if past!=1 , absorb(sector4#year#province IDs ) resid(res)
bys ID: egen mean_res=mean(res)
drop if mean_res==.
drop *res*

drop if past==1
keep if year==event | event==.

*broad sector
tab sector_g, gen(sects)
eststo clear
*firms in San Jose
gen san_jose=(province==7)

myttests sects* san_jose $vars , by(treated) 
distinct ID if treated==0
scalar events_control=r(ndistinct)
distinct ID if treated==1
scalar events_treated=r(ndistinct)
estadd scalar events_control
estadd scalar events_treated
}
display "column 1: Controls, column 2: treated"
esttab using ${path0}/results/4-appendix_d/Supplementary_Table_D2_a.tex, nomtitle collabels("Treated" "Control")  ///
    cells("mu_1(fmt(a3)) mu_2" ///
	"mu_1_se(par fmt(2)) mu_2_se(par fmt(2))" ) ///
	stats(events_control  events_treated , fmt(0 0) labels("\# Controls" "\# Treated" )) label ///
	replace type note("column 1: Controls, column 2: treated")	
	
****
**** matched sample 
**** 
display "Descriptives matched sample"
quiet{
use temp/temp_data3_ps.dta , clear	
keep if year==event	
bys case: gen num=_N
keep if num==4	

if "$procos" != "" {
global vars "$vars proco_score"
}

*broad sector
tab sector_g, gen(sects)
eststo clear
*firms in San Jose
gen san_jose=(province==7)

myttests sects* san_jose $vars , by(treated) 
egen g=group(ID case)
distinct g if treated==0
scalar events_control=r(ndistinct)
distinct ID if treated==1
scalar events_treated=r(ndistinct)
estadd scalar events_control
estadd scalar events_treated

global tab_num "b"
if "${procos}"=="_zdiagn"{
global tab_num "c"
}
if "${procos}"=="_zdiagn2"{
global tab_num "d"
}
}
 
display "column 1: Controls, column 2: treated"
esttab using ${path0}/results/4-appendix_d/Supplementary_Table_D2_${tab_num}2.tex, nomtitle nonumbers  ///
    cells("mu_1(fmt(a3)) mu_2 d(star pvalue(d_p))" ///
	"mu_1_se(par fmt(2)) mu_2_se(par fmt(2)) d_se(par fmt(2))" ) ///
	stats(events_control  events_treated , fmt(0 0) labels("\# Controls" "\# Treated" )) label collabels("Treated" "Control" "Diff") ///
	replace type note("column 1: Controls, column 2: treated")	
	
end 

********************************************************************************
*propensity/proco score: closest in propensity score 
********************************************************************************
prog ps_controls
*display "$ps_type score: closest in $ps_type score"

quiet{
*standardize naming 
capture rename proco_score pred_prob

*foreach treated finding controls
summ treated_n
local max_id = `r(max)'
}
display "prop score for year $year_. Number of treated firms `max_id'"

*loop over treated firms
forvalue p=1/ `max_id' {
quiet{
preserve
*firm to match
gen matching= (treated_number== `p' )
*no other treated
drop if event!=. & matching==0

*check only one treated firm 
distinct ID if event!=.
capass `r(ndistinct)'==1 , throw("more than one treated firm at a time")

*searching for controls within the same 2-digit sector
if "$procos" == "_zdiagn2" {
*broad sector when we only use firms with procomer score
capture drop sector4
gen sector4=sector_g
}
gen temp_sect=sector4 if matching ==1
egen sect=mean(temp_sect)
drop temp*
keep if sect==sector4
*diff between propensity score between treated and all potential controls
capture drop minus_matching
gen minus_matching=-matching 
sort minus_matching, stable  
gen score= pred_prob
replace score= abs(score-pred_prob[1])
sort score , stable 
 

*selecting nearest neighbors (+ 1 because it includes treated too)
keep if _n<= $neigh +1 

*saving
if _N==$neigh +1 {
keep ID matching 
gen event= $year_
gen case="`p'_$year_"
if `p' >1 | $year_ > $min_event {
*appending to previous group of treated and controls
append using temp/temp_data2_ps.dta
}

*saving temp file
save temp/temp_data2_ps.dta, replace 
}
restore
}
}
end

********************************************************************************
*propensity/proco score: regressions preparations
********************************************************************************
prog ps_reg_prep
display "$ps_type score: regressions preparations"
*using raw data
use temp/temp_data_ps.dta , clear
 
*creating event dummies 
capture gen matching=(event!=.)
capture drop D_*
capture drop T_*
capture drop time
gen time=year-event
global first_year=-5
global last_year=5
forvalue i= $first_year (1) $last_year{
if `i'<0{
scalar j=abs(`i')
gen D_m`=j' =( time==`i')
label var D_m`=j' "t $ =`i'$"
gen T_m`=j' =matching*( time==`i')
label var T_m`=j' "t $ =`i'$"
if `i'==$first_year {
replace D_m`=j' =( time<=`i' & time!=.)
label var D_m`=j' "t $\leq `i'$"
replace T_m`=j' =matching*( time<=`i' & time!=.)
label var T_m`=j' "t $\leq `i'$"
}
}
if `i'>=0{
gen D_`i' = (time==`i')
label var D_`i' "t $ =`i'$"
gen T_`i' = matching*(time==`i')
label var T_`i' "t $ =`i'$"
if `i'==$last_year{
replace D_`i' = (time>=`i'  & time!=.)
label var D_`i' "t $\geq `i'$"
replace T_`i' = matching*(time>=`i'  & time!=.)
label var T_`i' "t $\geq `i'$"
}
}
}
drop D_m1 T_m1
order D_* T_* , last
ds D_m4-D_4
global dummies0 `r(varlist)'
ds T_m4-T_4
global dummies1 `r(varlist)'

*saving data
save temp/temp_data3_ps.dta, replace
end
	
********************************************************************************
*propensity/proco score: regressions results
********************************************************************************
prog ps_reg
display "$ps_type score: regressions results"
quiet{
*use previous data
use temp/temp_data3_ps.dta , clear
capture egen IDs=group(ID)

forvalue i= $first_outcome / $last_outcome {

*var and names 
global var ${outcome_`i'}
global name ${name_`i'}

*regressions
global REG "reghdfe $var  D_* T_*  , absorb( $fixed_effects ) vce( $SE )"
if "$name" == "Employment" | "$name" == "Number_of_Other_Clients" {
global quant "1"
}
run_reg_match
estimates store ps${procos}_`i'

}
}
end

*************************
*program to compare the means 
*************************
program myttests, eclass
     syntax varlist [if] [in], by(varname) [ * ]
     marksample touse
     markout `touse' `by'
	 local varia_program "mu_1 mu_2 n_1 n_2 mu_1_se mu_2_se d d_se d_t d_p"
     tempname `varia_program'
      foreach var of local varlist {
         qui ttest `var' if `touse', by(`by') `options'
         mat `mu_1' = nullmat(`mu_1'), r(mu_1)
         mat `mu_2' = nullmat(`mu_2'), r(mu_2)
         mat `n_1' = nullmat(`n_1'), r(N_1)
         mat `n_2' = nullmat(`n_2'), r(N_2)		 
         mat `mu_1_se' = nullmat(`mu_1_se'), r(sd_1)
         mat `mu_2_se' = nullmat(`mu_2_se'), r(sd_2)		 
         mat `d'    = nullmat(`d'   ), r(mu_1)-r(mu_2)
         mat `d_se' = nullmat(`d_se'), r(se)
         mat `d_t'  = nullmat(`d_t' ), r(t)
         mat `d_p'  = nullmat(`d_p' ), r(p)
     }
         foreach mat in `varia_program' {
         mat coln ``mat'' = `varlist'
     }
     tempname b V
     mat `b' = `mu_1'*0
     mat `V' = `b''*`b'
     eret post `b' `V'
     eret local cmd "myttests"
     foreach mat in `varia_program' {
        eret mat `mat' = ``mat''
    }
 end 
 
********************************************************************************
*running regression specification 
********************************************************************************
prog run_reg_match
capture drop resid
capture drop dep
capture drop id
*regression 
$REG resid(resid) 
 
* add number of observations 
scalar n_obs=e(N)
estadd scalar n_obs

* add distinct number of firms
distinct IDs if resid!=.
scalar n_firms=r(ndistinct)
estadd scalar n_firms 

* add mean and sd of dependent variable 
global dep_var "`e(depvar)'"
if "$quant" == ""{
*in US dollars
gen dep=exp( $dep_var )/500
}
if "$quant" == "1" {
gen dep=exp( $dep_var )
}
global quant ""
quietly summ dep if resid!=.
scalar mean_dep=r(mean)
scalar sd_dep=r(sd)
estadd scalar mean_dep
estadd scalar sd_dep

*add number of fixed effects 
scalar n_fe=e(df_a_initial)
estadd scalar n_fe

*number of treated and controls
egen id=group(IDs case)
distinct id if resid!=. & matching==1
scalar n_treated=r(ndistinct)
distinct id if resid!=. & matching==0
scalar n_control=r(ndistinct)
estadd scalar n_treated
estadd scalar n_control

capture drop dep
capture drop id
end

********************************************************************************
*code producing the figures 
********************************************************************************
prog figures
display "figures"
keep if event_time>=-4 & event_time<=4
preserve
capture rename upper_bound_5p upper_bound
capture rename lower_bound_5p lower_bound
global ranges "${range_${name}}"

*figure for each outcome 
twoway rcap upper_bound lower_bound event_time , /// 
lc(emerald) lpattern(solid) ///
||     scatter coeff_hat event_time , ///
mc(emerald) msymbol(O) ///
ytitle("Coefficients: $outcome ", size(medlarge)) bgcolor(white) ///
xtitle("Years Since First MNC Interaction", size(medlarge)) ///
 graphregion(fcolor(white)) yline(0) legend(off) ///
 ylabel( $ranges ,labsize(large)) ///
 xlabels(-4(1)4, labsize(large))
restore
end

quiet{
***  PRODUCER PRICE INDEX  (yearly average) 2013 ==100   
global ipc2005 = 50.99*100/92.88
global ipc2006 = 57.96*100/92.88
global ipc2007 = 63.38*100/92.88
global ipc2008 = 75.43*100/92.88
global ipc2009 = 77.53*100/92.88
global ipc2010 = 81.15*100/92.88
global ipc2011 = 85.57*100/92.88
global ipc2012 = 89.78*100/92.88
global ipc2013 = 92.88*100/92.88
global ipc2014 = 98.99*100/92.88
global ipc2015 = 98.93*100/92.88
global ipc2016 = 100.08*100/92.88
global ipc2017 = 100.77*100/92.88

********************************* 
***  AVERAGE EXCHANGE RATE    ***  
*********************************
* 1 March of each year: average compra-venta
global tc_2005=477.76578
global tc_2006=511.29055
global tc_2007=516.59063
global tc_2008=526.35683
global tc_2009=573.35611
global tc_2010=525.68364
global tc_2011=505.69
global tc_2012=502.8868
global tc_2013=499.75405
global tc_2014=538.36123
global tc_2015=534.55408
global tc_2016=544.76658
global tc_2017=567.55509
}
}
******************************************************************************** 
***  EXECUTE THE PROGRAM main   
********************************************************************************
matching 

capture erase temp/revec_controls.dta
capture erase temp/EE_controls.dta
capture erase temp/trans_controls.dta
capture erase temp/occupations.dta
capture erase temp/temp_bigdom.dta
capture erase temp/temp_exp.dta
capture erase temp/temp_control_vars.dta
capture erase temp/temp_scores.dta
capture erase temp/temp_data_ps.dta
capture erase temp/temp_data1_ps.dta
capture erase temp/temp_data2_ps.dta
capture erase temp/temp_data3_ps.dta
capture erase temp/temp_data_nn.dta
capture erase temp/temp_data_nn_full.dta
capture erase temp/temp_data1_nn.dta
capture erase temp/temp_data2_nn.dta
capture erase temp/temp_data3_nn.dta
capture erase temp/temp_data4_nn.dta
capture erase temp/sub_samples_nn.dta
